ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
/* FPU is set up to default initial state. */
- memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
/* Virtual IDT is empty at start-of-day. */
for ( i = 0; i < 256; i++ )
}
/* Ring 1 stack is the initial stack. */
- ctxt.kernel_ss = __KERNEL_DS;
- ctxt.kernel_esp = idle->thread.esp;
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_sp = idle->thread.esp;
/* Callback handlers. */
ctxt.event_callback_cs = __KERNEL_CS;
EVENT_MASK = (CS+4)
-ECF_IN_SYSCALL = (1<<8)
+VGCF_IN_SYSCALL = (1<<8)
/*
* Copied from arch/xen/i386/kernel/entry.S
* struct switch_to_user {
* u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
* } PACKED;
- * #define ECF_IN_SYSCALL (1<<8)
+ * #define VGCF_IN_SYSCALL (1<<8)
*/
.macro SWITCH_TO_USER flag
movl $0,%gs:pda_kernel_mode # change to user mode
jnz sysret_careful
XEN_UNBLOCK_EVENTS(%rsi)
RESTORE_ARGS 0,8,0
- SWITCH_TO_USER ECF_IN_SYSCALL
+ SWITCH_TO_USER VGCF_IN_SYSCALL
/* Handle reschedules */
/* edx: work, edi: workmask */
ctxt->user_regs.eflags = (1<<9) | (1<<2);
/* FPU is set up to default initial state. */
- memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
/* Virtual IDT is empty at start-of-day. */
for ( i = 0; i < 256; i++ )
ctxt->gdt_ents = 0;
/* Ring 1 stack is the initial stack. */
- ctxt->kernel_ss = FLAT_KERNEL_DS;
- ctxt->kernel_esp = vstartinfo_start + 2*PAGE_SIZE;
+ ctxt->kernel_ss = FLAT_KERNEL_DS;
+ ctxt->kernel_sp = vstartinfo_start + 2*PAGE_SIZE;
/* No debugging. */
memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
ctxt->user_regs.eflags = (1 << 9) | (1 << 2);
/* FPU is set up to default initial state. */
- memset(ctxt->fpu_ctxt, 0, sizeof (ctxt->fpu_ctxt));
+ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
/* Virtual IDT is empty at start-of-day. */
for (i = 0; i < 256; i++) {
/* Ring 1 stack is the initial stack. */
/* put stack at top of second page */
ctxt->kernel_ss = FLAT_KERNEL_DS;
- ctxt->kernel_esp = ctxt->user_regs.esp;
+ ctxt->kernel_sp = ctxt->user_regs.esp;
/* No debugging. */
memset(ctxt->debugreg, 0, sizeof (ctxt->debugreg));
if ( image != NULL )
free(image);
- ctxt->flags = ECF_VMX_GUEST;
+ ctxt->flags = VGCF_VMX_GUEST;
/* FPU is set up to default initial state. */
- memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
/* Virtual IDT is empty at start-of-day. */
for ( i = 0; i < 256; i++ )
/* Ring 1 stack is the initial stack. */
/*
- ctxt->kernel_ss = FLAT_KERNEL_DS;
- ctxt->kernel_esp = vstartinfo_start;
+ ctxt->kernel_ss = FLAT_KERNEL_DS;
+ ctxt->kernel_sp = vstartinfo_start;
*/
/* No debugging. */
memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
#endif
#endif
- c->flags = 0;
- memcpy(&c->user_regs,
- &ed->arch.user_regs,
- sizeof(ed->arch.user_regs));
+ memcpy(c, &ed->arch.guest_context, sizeof(*c));
+
/* IOPL privileges are virtualised -- merge back into returned eflags. */
BUG_ON((c->user_regs.eflags & EF_IOPL) != 0);
c->user_regs.eflags |= ed->arch.iopl << 12;
#endif
#endif
+ c->flags = 0;
if ( test_bit(EDF_DONEFPUINIT, &ed->ed_flags) )
- c->flags |= ECF_I387_VALID;
- if ( KERNEL_MODE(ed, &ed->arch.user_regs) )
- c->flags |= ECF_IN_KERNEL;
+ c->flags |= VGCF_I387_VALID;
+ if ( KERNEL_MODE(ed, &ed->arch.guest_context.user_regs) )
+ c->flags |= VGCF_IN_KERNEL;
#ifdef CONFIG_VMX
if (VMX_DOMAIN(ed))
- c->flags |= ECF_VMX_GUEST;
+ c->flags |= VGCF_VMX_GUEST;
#endif
- memcpy(&c->fpu_ctxt,
- &ed->arch.i387,
- sizeof(ed->arch.i387));
- memcpy(&c->trap_ctxt,
- ed->arch.traps,
- sizeof(ed->arch.traps));
+
#ifdef ARCH_HAS_FAST_TRAP
if ( (ed->arch.fast_trap_desc.a == 0) &&
(ed->arch.fast_trap_desc.b == 0) )
c->fast_trap_idx = 0;
- else
- c->fast_trap_idx =
- ed->arch.fast_trap_idx;
#endif
- c->ldt_base = ed->arch.ldt_base;
- c->ldt_ents = ed->arch.ldt_ents;
+
c->gdt_ents = 0;
if ( GET_GDT_ADDRESS(ed) == GDT_VIRT_START(ed) )
{
l1e_get_pfn(ed->arch.perdomain_ptes[i]);
c->gdt_ents = GET_GDT_ENTRIES(ed);
}
- c->kernel_ss = ed->arch.kernel_ss;
- c->kernel_esp = ed->arch.kernel_sp;
- c->pt_base =
- pagetable_val(ed->arch.guest_table);
- memcpy(c->debugreg,
- ed->arch.debugreg,
- sizeof(ed->arch.debugreg));
-#if defined(__i386__)
- c->event_callback_cs = ed->arch.event_selector;
- c->event_callback_eip = ed->arch.event_address;
- c->failsafe_callback_cs = ed->arch.failsafe_selector;
- c->failsafe_callback_eip = ed->arch.failsafe_address;
-#elif defined(__x86_64__)
- c->event_callback_eip = ed->arch.event_address;
- c->failsafe_callback_eip = ed->arch.failsafe_address;
- c->syscall_callback_eip = ed->arch.syscall_address;
-#endif
+
+ c->pt_base = pagetable_val(ed->arch.guest_table);
+
c->vm_assist = ed->domain->vm_assist;
}
* #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
* If SS RPL or DPL differs from CS RPL then we'll #GP.
*/
- if (!(c->flags & ECF_VMX_GUEST))
+ if ( !(c->flags & VGCF_VMX_GUEST) )
+ {
if ( ((c->user_regs.cs & 3) == 0) ||
((c->user_regs.ss & 3) == 0) )
return -EINVAL;
+ }
clear_bit(EDF_DONEFPUINIT, &ed->ed_flags);
- if ( c->flags & ECF_I387_VALID )
+ if ( c->flags & VGCF_I387_VALID )
set_bit(EDF_DONEFPUINIT, &ed->ed_flags);
ed->arch.flags &= ~TF_kernel_mode;
- if ( c->flags & ECF_IN_KERNEL )
+ if ( c->flags & VGCF_IN_KERNEL )
ed->arch.flags |= TF_kernel_mode;
- memcpy(&ed->arch.user_regs,
- &c->user_regs,
- sizeof(ed->arch.user_regs));
-
- memcpy(&ed->arch.i387,
- &c->fpu_ctxt,
- sizeof(ed->arch.i387));
+ memcpy(&ed->arch.guest_context, c, sizeof(*c));
/* IOPL privileges are virtualised. */
- ed->arch.iopl = (ed->arch.user_regs.eflags >> 12) & 3;
- ed->arch.user_regs.eflags &= ~EF_IOPL;
+ ed->arch.iopl = (ed->arch.guest_context.user_regs.eflags >> 12) & 3;
+ ed->arch.guest_context.user_regs.eflags &= ~EF_IOPL;
/* Clear IOPL for unprivileged domains. */
- if (!IS_PRIV(d))
- ed->arch.user_regs.eflags &= 0xffffcfff;
+ if ( !IS_PRIV(d) )
+ ed->arch.guest_context.user_regs.eflags &= 0xffffcfff;
- if (test_bit(EDF_DONEINIT, &ed->ed_flags))
+ if ( test_bit(EDF_DONEINIT, &ed->ed_flags) )
return 0;
- memcpy(ed->arch.traps,
- &c->trap_ctxt,
- sizeof(ed->arch.traps));
-
if ( (rc = (int)set_fast_trap(ed, c->fast_trap_idx)) != 0 )
return rc;
- ed->arch.ldt_base = c->ldt_base;
- ed->arch.ldt_ents = c->ldt_ents;
-
- ed->arch.kernel_ss = c->kernel_ss;
- ed->arch.kernel_sp = c->kernel_esp;
-
+ memset(ed->arch.guest_context.debugreg, 0,
+ sizeof(ed->arch.guest_context.debugreg));
for ( i = 0; i < 8; i++ )
(void)set_debugreg(ed, i, c->debugreg[i]);
-#if defined(__i386__)
- ed->arch.event_selector = c->event_callback_cs;
- ed->arch.event_address = c->event_callback_eip;
- ed->arch.failsafe_selector = c->failsafe_callback_cs;
- ed->arch.failsafe_address = c->failsafe_callback_eip;
-#elif defined(__x86_64__)
- ed->arch.event_address = c->event_callback_eip;
- ed->arch.failsafe_address = c->failsafe_callback_eip;
- ed->arch.syscall_address = c->syscall_callback_eip;
-#endif
-
if ( ed->eid == 0 )
d->vm_assist = c->vm_assist;
}
#ifdef CONFIG_VMX
- if ( c->flags & ECF_VMX_GUEST )
+ if ( c->flags & VGCF_VMX_GUEST )
{
int error;
unsigned long start_stack,
unsigned long start_info)
{
- struct cpu_user_regs *regs = &d->arch.user_regs;
+ struct cpu_user_regs *regs = &d->arch.guest_context.user_regs;
/*
* Initial register values:
static void load_segments(struct exec_domain *p, struct exec_domain *n)
{
+ struct vcpu_guest_context *pctxt = &p->arch.guest_context;
+ struct vcpu_guest_context *nctxt = &n->arch.guest_context;
int all_segs_okay = 1;
/* Either selector != 0 ==> reload. */
- if ( unlikely(p->arch.user_regs.ds |
- n->arch.user_regs.ds) )
- all_segs_okay &= loadsegment(ds, n->arch.user_regs.ds);
+ if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+ all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
/* Either selector != 0 ==> reload. */
- if ( unlikely(p->arch.user_regs.es |
- n->arch.user_regs.es) )
- all_segs_okay &= loadsegment(es, n->arch.user_regs.es);
+ if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+ all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
/*
* Either selector != 0 ==> reload.
* Also reload to reset FS_BASE if it was non-zero.
*/
- if ( unlikely(p->arch.user_regs.fs |
- p->arch.user_regs.fs_base |
- n->arch.user_regs.fs) )
+ if ( unlikely(pctxt->user_regs.fs |
+ pctxt->fs_base |
+ nctxt->user_regs.fs) )
{
- all_segs_okay &= loadsegment(fs, n->arch.user_regs.fs);
- if ( p->arch.user_regs.fs ) /* != 0 selector kills fs_base */
- p->arch.user_regs.fs_base = 0;
+ all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
+ if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
+ pctxt->fs_base = 0;
}
/*
* Either selector != 0 ==> reload.
* Also reload to reset GS_BASE if it was non-zero.
*/
- if ( unlikely(p->arch.user_regs.gs |
- p->arch.user_regs.gs_base_user |
- n->arch.user_regs.gs) )
+ if ( unlikely(pctxt->user_regs.gs |
+ pctxt->gs_base_user |
+ nctxt->user_regs.gs) )
{
/* Reset GS_BASE with user %gs? */
- if ( p->arch.user_regs.gs || !n->arch.user_regs.gs_base_user )
- all_segs_okay &= loadsegment(gs, n->arch.user_regs.gs);
- if ( p->arch.user_regs.gs ) /* != 0 selector kills gs_base_user */
- p->arch.user_regs.gs_base_user = 0;
+ if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+ all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
+ if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
+ pctxt->gs_base_user = 0;
}
/* This can only be non-zero if selector is NULL. */
- if ( n->arch.user_regs.fs_base )
+ if ( nctxt->fs_base )
wrmsr(MSR_FS_BASE,
- n->arch.user_regs.fs_base,
- n->arch.user_regs.fs_base>>32);
+ nctxt->fs_base,
+ nctxt->fs_base>>32);
/* Most kernels have non-zero GS base, so don't bother testing. */
/* (This is also a serialising instruction, avoiding AMD erratum #88.) */
wrmsr(MSR_SHADOW_GS_BASE,
- n->arch.user_regs.gs_base_kernel,
- n->arch.user_regs.gs_base_kernel>>32);
+ nctxt->gs_base_kernel,
+ nctxt->gs_base_kernel>>32);
/* This can only be non-zero if selector is NULL. */
- if ( n->arch.user_regs.gs_base_user )
+ if ( nctxt->gs_base_user )
wrmsr(MSR_GS_BASE,
- n->arch.user_regs.gs_base_user,
- n->arch.user_regs.gs_base_user>>32);
+ nctxt->gs_base_user,
+ nctxt->gs_base_user>>32);
/* If in kernel mode then switch the GS bases around. */
if ( n->arch.flags & TF_kernel_mode )
unsigned long *rsp =
(n->arch.flags & TF_kernel_mode) ?
(unsigned long *)regs->rsp :
- (unsigned long *)n->arch.kernel_sp;
+ (unsigned long *)nctxt->kernel_sp;
if ( !(n->arch.flags & TF_kernel_mode) )
toggle_guest_mode(n);
else
regs->cs &= ~3;
- if ( put_user(regs->ss, rsp- 1) |
- put_user(regs->rsp, rsp- 2) |
- put_user(regs->rflags, rsp- 3) |
- put_user(regs->cs, rsp- 4) |
- put_user(regs->rip, rsp- 5) |
- put_user(n->arch.user_regs.gs, rsp- 6) |
- put_user(n->arch.user_regs.fs, rsp- 7) |
- put_user(n->arch.user_regs.es, rsp- 8) |
- put_user(n->arch.user_regs.ds, rsp- 9) |
- put_user(regs->r11, rsp-10) |
- put_user(regs->rcx, rsp-11) )
+ if ( put_user(regs->ss, rsp- 1) |
+ put_user(regs->rsp, rsp- 2) |
+ put_user(regs->rflags, rsp- 3) |
+ put_user(regs->cs, rsp- 4) |
+ put_user(regs->rip, rsp- 5) |
+ put_user(nctxt->user_regs.gs, rsp- 6) |
+ put_user(nctxt->user_regs.fs, rsp- 7) |
+ put_user(nctxt->user_regs.es, rsp- 8) |
+ put_user(nctxt->user_regs.ds, rsp- 9) |
+ put_user(regs->r11, rsp-10) |
+ put_user(regs->rcx, rsp-11) )
{
DPRINTK("Error while creating failsafe callback frame.\n");
domain_crash();
regs->ss = __GUEST_SS;
regs->rsp = (unsigned long)(rsp-11);
regs->cs = __GUEST_CS;
- regs->rip = n->arch.failsafe_address;
+ regs->rip = nctxt->failsafe_callback_eip;
}
}
-static void save_segments(struct exec_domain *p)
+static void save_segments(struct exec_domain *ed)
{
- __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (p->arch.user_regs.ds) );
- __asm__ __volatile__ ( "movl %%es,%0" : "=m" (p->arch.user_regs.es) );
- __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (p->arch.user_regs.fs) );
- __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (p->arch.user_regs.gs) );
+ struct cpu_user_regs *regs = &ed->arch.guest_context.user_regs;
+ __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (regs->ds) );
+ __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
+ __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
+ __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
}
static void clear_segments(void)
regs->rsp = stu.rsp;
regs->ss = stu.ss | 3; /* force guest privilege */
- if ( !(stu.flags & ECF_IN_SYSCALL) )
+ if ( !(stu.flags & VGCF_IN_SYSCALL) )
{
regs->entry_vector = 0;
regs->r11 = stu.r11;
static inline void switch_kernel_stack(struct exec_domain *n, unsigned int cpu)
{
struct tss_struct *tss = &init_tss[cpu];
- tss->esp1 = n->arch.kernel_sp;
- tss->ss1 = n->arch.kernel_ss;
+ tss->esp1 = n->arch.guest_context.kernel_sp;
+ tss->ss1 = n->arch.guest_context.kernel_ss;
}
#endif
static void __context_switch(void)
{
- struct cpu_user_regs *stack_ec = get_cpu_user_regs();
+ struct cpu_user_regs *stack_regs = get_cpu_user_regs();
unsigned int cpu = smp_processor_id();
struct exec_domain *p = percpu_ctxt[cpu].curr_ed;
struct exec_domain *n = current;
if ( !is_idle_task(p->domain) )
{
- memcpy(&p->arch.user_regs,
- stack_ec,
+ memcpy(&p->arch.guest_context.user_regs,
+ stack_regs,
CTXT_SWITCH_STACK_BYTES);
unlazy_fpu(p);
CLEAR_FAST_TRAP(&p->arch);
if ( !is_idle_task(n->domain) )
{
- memcpy(stack_ec,
- &n->arch.user_regs,
+ memcpy(stack_regs,
+ &n->arch.guest_context.user_regs,
CTXT_SWITCH_STACK_BYTES);
/* Maybe switch the debug registers. */
- if ( unlikely(n->arch.debugreg[7]) )
+ if ( unlikely(n->arch.guest_context.debugreg[7]) )
{
- loaddebug(&n->arch, 0);
- loaddebug(&n->arch, 1);
- loaddebug(&n->arch, 2);
- loaddebug(&n->arch, 3);
+ loaddebug(&n->arch.guest_context, 0);
+ loaddebug(&n->arch.guest_context, 1);
+ loaddebug(&n->arch.guest_context, 2);
+ loaddebug(&n->arch.guest_context, 3);
/* no 4 and 5 */
- loaddebug(&n->arch, 6);
- loaddebug(&n->arch, 7);
+ loaddebug(&n->arch.guest_context, 6);
+ loaddebug(&n->arch.guest_context, 7);
}
if ( !VMX_DOMAIN(n) )
* We're basically forcing default RPLs to 1, so that our "what privilege
* level are we returning to?" logic works.
*/
- ed->arch.failsafe_selector = FLAT_KERNEL_CS;
- ed->arch.event_selector = FLAT_KERNEL_CS;
- ed->arch.kernel_ss = FLAT_KERNEL_SS;
+ ed->arch.guest_context.kernel_ss = FLAT_KERNEL_SS;
for ( i = 0; i < 256; i++ )
- ed->arch.traps[i].cs = FLAT_KERNEL_CS;
+ ed->arch.guest_context.trap_ctxt[i].cs = FLAT_KERNEL_CS;
#if defined(__i386__)
+ ed->arch.guest_context.failsafe_callback_cs = FLAT_KERNEL_CS;
+ ed->arch.guest_context.event_callback_cs = FLAT_KERNEL_CS;
+
/*
* Protect the lowest 1GB of memory. We use a temporary mapping there
* from which we copy the kernel and ramdisk images.
if ( cpu_has_fxsr )
__asm__ __volatile__ (
"fxsave %0 ; fnclex"
- : "=m" (tsk->arch.i387) );
+ : "=m" (tsk->arch.guest_context.fpu_ctxt) );
else
__asm__ __volatile__ (
"fnsave %0 ; fwait"
- : "=m" (tsk->arch.i387) );
+ : "=m" (tsk->arch.guest_context.fpu_ctxt) );
clear_bit(EDF_USEDFPU, &tsk->ed_flags);
stts();
if ( cpu_has_fxsr )
__asm__ __volatile__ (
"fxrstor %0"
- : : "m" (tsk->arch.i387) );
+ : : "m" (tsk->arch.guest_context.fpu_ctxt) );
else
__asm__ __volatile__ (
"frstor %0"
- : : "m" (tsk->arch.i387) );
+ : : "m" (tsk->arch.guest_context.fpu_ctxt) );
}
/*
struct domain *d = ed->domain;
unsigned long gpfn, gmfn;
l1_pgentry_t l1e, nl1e;
- unsigned gva = ed->arch.ldt_base + (off << PAGE_SHIFT);
+ unsigned gva = ed->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
int res;
#if defined(__x86_64__)
okay = 0;
MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
}
- else if ( (ed->arch.ldt_ents != ents) ||
- (ed->arch.ldt_base != ptr) )
+ else if ( (ed->arch.guest_context.ldt_ents != ents) ||
+ (ed->arch.guest_context.ldt_base != ptr) )
{
invalidate_shadow_ldt(ed);
- ed->arch.ldt_base = ptr;
- ed->arch.ldt_ents = ents;
+ ed->arch.guest_context.ldt_base = ptr;
+ ed->arch.guest_context.ldt_ents = ents;
load_LDT(ed);
percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
if ( ents != 0 )
goto xen_fault;
#ifndef NDEBUG
- if ( (ed->arch.traps[trapnr].address == 0) && (ed->domain->id == 0) )
+ if ( (ed->arch.guest_context.trap_ctxt[trapnr].address == 0) &&
+ (ed->domain->id == 0) )
goto xen_fault;
#endif
- ti = current->arch.traps + trapnr;
+ ti = ¤t->arch.guest_context.trap_ctxt[trapnr];
tb->flags = TBF_EXCEPTION;
tb->cs = ti->cs;
tb->eip = ti->address;
panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
}
- ti = current->arch.traps + 3;
+ ti = ¤t->arch.guest_context.trap_ctxt[TRAP_int3];
tb->flags = TBF_EXCEPTION;
tb->cs = ti->cs;
tb->eip = ti->address;
struct exec_domain *ed = current;
struct trap_bounce *tb = &ed->arch.trap_bounce;
- ti = ed->arch.traps + 14;
+ ti = &ed->arch.guest_context.trap_ctxt[TRAP_page_fault];
tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
tb->cr2 = addr;
tb->error_code = error_code;
}
if ( unlikely(addr >= LDT_VIRT_START(ed)) &&
- (addr < (LDT_VIRT_START(ed) + (ed->arch.ldt_ents*LDT_ENTRY_SIZE))) )
+ (addr < (LDT_VIRT_START(ed) +
+ (ed->arch.guest_context.ldt_ents*LDT_ENTRY_SIZE))) )
{
/*
* Copy a mapping from the guest's LDT, if it is valid. Otherwise we
extern int map_ldt_shadow_page(unsigned int);
LOCK_BIGLOCK(d);
off = addr - LDT_VIRT_START(ed);
- addr = ed->arch.ldt_base + off;
+ addr = ed->arch.guest_context.ldt_base + off;
ret = map_ldt_shadow_page(off >> PAGE_SHIFT);
UNLOCK_BIGLOCK(d);
if ( likely(ret) )
goto xen_fault;
#ifndef NDEBUG
- if ( (ed->arch.traps[TRAP_page_fault].address == 0) && (d->id == 0) )
+ if ( (ed->arch.guest_context.trap_ctxt[TRAP_page_fault].address == 0) &&
+ (d->id == 0) )
goto xen_fault;
#endif
if ( (regs->error_code & 3) == 2 )
{
/* This fault must be due to <INT n> instruction. */
- ti = current->arch.traps + (regs->error_code>>3);
+ ti = ¤t->arch.guest_context.trap_ctxt[regs->error_code>>3];
if ( PERMIT_SOFTINT(TI_GET_DPL(ti), ed, regs) )
{
tb->flags = TBF_EXCEPTION;
#endif
#ifndef NDEBUG
- if ( (ed->arch.traps[TRAP_gp_fault].address == 0) &&
+ if ( (ed->arch.guest_context.trap_ctxt[TRAP_gp_fault].address == 0) &&
(ed->domain->id == 0) )
goto gp_in_kernel;
#endif
/* Pass on GPF as is. */
- ti = current->arch.traps + 13;
+ ti = ¤t->arch.guest_context.trap_ctxt[TRAP_gp_fault];
tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
tb->error_code = regs->error_code;
finish_propagation:
/* Prevent recursion. */
clts();
- if ( !test_bit(EDF_USEDFPU, ¤t->ed_flags) )
+ if ( !test_and_set_bit(EDF_USEDFPU, ¤t->ed_flags) )
{
if ( test_bit(EDF_DONEFPUINIT, ¤t->ed_flags) )
restore_fpu(current);
else
init_fpu();
- set_bit(EDF_USEDFPU, ¤t->ed_flags); /* so we fnsave on switch_to() */
}
if ( test_and_clear_bit(EDF_GUEST_STTS, ¤t->ed_flags) )
{
struct trap_bounce *tb = ¤t->arch.trap_bounce;
- tb->flags = TBF_EXCEPTION;
- tb->cs = current->arch.traps[7].cs;
- tb->eip = current->arch.traps[7].address;
+ tb->flags = TBF_EXCEPTION;
+ tb->cs = current->arch.guest_context.trap_ctxt[7].cs;
+ tb->eip = current->arch.guest_context.trap_ctxt[7].address;
}
return EXCRET_fault_fixed;
/* Mask out spurious debug traps due to lazy DR7 setting */
if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
- (ed->arch.debugreg[7] == 0) )
+ (ed->arch.guest_context.debugreg[7] == 0) )
{
__asm__("mov %0,%%db7" : : "r" (0UL));
goto out;
}
/* Save debug status register where guest OS can peek at it */
- ed->arch.debugreg[6] = condition;
+ ed->arch.guest_context.debugreg[6] = condition;
tb->flags = TBF_EXCEPTION;
- tb->cs = ed->arch.traps[1].cs;
- tb->eip = ed->arch.traps[1].address;
+ tb->cs = ed->arch.guest_context.trap_ctxt[TRAP_debug].cs;
+ tb->eip = ed->arch.guest_context.trap_ctxt[TRAP_debug].address;
out:
return EXCRET_not_a_fault;
long do_set_trap_table(trap_info_t *traps)
{
trap_info_t cur;
- trap_info_t *dst = current->arch.traps;
+ trap_info_t *dst = current->arch.guest_context.trap_ctxt;
long rc = 0;
LOCK_BIGLOCK(current->domain);
return -EINVAL;
}
- p->arch.debugreg[reg] = value;
+ p->arch.guest_context.debugreg[reg] = value;
return 0;
}
unsigned long do_get_debugreg(int reg)
{
if ( (reg < 0) || (reg > 7) ) return -EINVAL;
- return current->arch.debugreg[reg];
+ return current->arch.guest_context.debugreg[reg];
}
/*
case TYPE_MOV_TO_DR:
/* don't need to check the range */
if (reg != REG_ESP)
- ed->arch.debugreg[reg] = *reg_p;
+ ed->arch.guest_context.debugreg[reg] = *reg_p;
else {
unsigned long value;
__vmread(GUEST_ESP, &value);
- ed->arch.debugreg[reg] = value;
+ ed->arch.guest_context.debugreg[reg] = value;
}
break;
case TYPE_MOV_FROM_DR:
if (reg != REG_ESP)
- *reg_p = ed->arch.debugreg[reg];
+ *reg_p = ed->arch.guest_context.debugreg[reg];
else {
- __vmwrite(GUEST_ESP, ed->arch.debugreg[reg]);
+ __vmwrite(GUEST_ESP, ed->arch.guest_context.debugreg[reg]);
}
break;
}
OFFSET(EDOMAIN_processor, struct exec_domain, processor);
OFFSET(EDOMAIN_vcpu_info, struct exec_domain, vcpu_info);
- OFFSET(EDOMAIN_event_sel, struct exec_domain, arch.event_selector);
- OFFSET(EDOMAIN_event_addr, struct exec_domain, arch.event_address);
- OFFSET(EDOMAIN_failsafe_sel, struct exec_domain, arch.failsafe_selector);
- OFFSET(EDOMAIN_failsafe_addr, struct exec_domain, arch.failsafe_address);
OFFSET(EDOMAIN_trap_bounce, struct exec_domain, arch.trap_bounce);
OFFSET(EDOMAIN_thread_flags, struct exec_domain, arch.flags);
- OFFSET(EDOMAIN_kernel_ss, struct exec_domain, arch.kernel_ss);
- OFFSET(EDOMAIN_kernel_sp, struct exec_domain, arch.kernel_sp);
+ OFFSET(EDOMAIN_event_sel, struct exec_domain,
+ arch.guest_context.event_callback_cs);
+ OFFSET(EDOMAIN_event_addr, struct exec_domain,
+ arch.guest_context.event_callback_eip);
+ OFFSET(EDOMAIN_failsafe_sel, struct exec_domain,
+ arch.guest_context.failsafe_callback_cs);
+ OFFSET(EDOMAIN_failsafe_addr, struct exec_domain,
+ arch.guest_context.failsafe_callback_eip);
+ OFFSET(EDOMAIN_kernel_ss, struct exec_domain,
+ arch.guest_context.kernel_ss);
+ OFFSET(EDOMAIN_kernel_sp, struct exec_domain,
+ arch.guest_context.kernel_sp);
BLANK();
OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
if ( (ss & 3) != 1 )
return -EPERM;
- current->arch.kernel_ss = ss;
- current->arch.kernel_sp = esp;
+ current->arch.guest_context.kernel_ss = ss;
+ current->arch.guest_context.kernel_sp = esp;
t->ss1 = ss;
t->esp1 = esp;
if ( ldt )
{
table = (unsigned long *)LDT_VIRT_START(d);
- if ( idx >= d->arch.ldt_ents )
+ if ( idx >= d->arch.guest_context.ldt_ents )
goto fail;
}
else /* gdt */
if ( ldt )
{
table = (unsigned long *)LDT_VIRT_START(d);
- if ( idx >= d->arch.ldt_ents )
+ if ( idx >= d->arch.guest_context.ldt_ents )
{
DPRINTK("Segment %04x out of LDT range (%ld)\n",
seg, d->arch.ldt_ents);
/* If requested, give a callback on otherwise unused vector 15. */
if ( VM_ASSIST(d->domain, VMASST_TYPE_4gb_segments_notify) )
{
- ti = &d->arch.traps[15];
+ ti = &d->arch.guest_context.trap_ctxt[15];
tb = &d->arch.trap_bounce;
tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
tb->error_code = pb - eip;
if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
return -1;
- ti = p->arch.traps + idx;
+ ti = &p->arch.guest_context.trap_ctxt[idx];
/*
* We can't virtualise interrupt gates, as there's no way to get
if ( p == current )
CLEAR_FAST_TRAP(&p->arch);
- p->arch.fast_trap_idx = idx;
+ p->arch.guest_context.fast_trap_idx = idx;
p->arch.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
p->arch.fast_trap_desc.b =
(ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
return -EPERM;
- d->arch.event_selector = event_selector;
- d->arch.event_address = event_address;
- d->arch.failsafe_selector = failsafe_selector;
- d->arch.failsafe_address = failsafe_address;
+ d->arch.guest_context.event_callback_cs = event_selector;
+ d->arch.guest_context.event_callback_eip = event_address;
+ d->arch.guest_context.failsafe_callback_cs = failsafe_selector;
+ d->arch.guest_context.failsafe_callback_eip = failsafe_address;
return 0;
}
OFFSET(EDOMAIN_processor, struct exec_domain, processor);
OFFSET(EDOMAIN_vcpu_info, struct exec_domain, vcpu_info);
- OFFSET(EDOMAIN_event_addr, struct exec_domain, arch.event_address);
- OFFSET(EDOMAIN_failsafe_addr, struct exec_domain, arch.failsafe_address);
- OFFSET(EDOMAIN_syscall_addr, struct exec_domain, arch.syscall_address);
OFFSET(EDOMAIN_trap_bounce, struct exec_domain, arch.trap_bounce);
OFFSET(EDOMAIN_thread_flags, struct exec_domain, arch.flags);
- OFFSET(EDOMAIN_kernel_sp, struct exec_domain, arch.kernel_sp);
+ OFFSET(EDOMAIN_event_addr, struct exec_domain,
+ arch.guest_context.event_callback_eip);
+ OFFSET(EDOMAIN_failsafe_addr, struct exec_domain,
+ arch.guest_context.failsafe_callback_eip);
+ OFFSET(EDOMAIN_syscall_addr, struct exec_domain,
+ arch.guest_context.syscall_callback_eip);
+ OFFSET(EDOMAIN_kernel_sp, struct exec_domain,
+ arch.guest_context.kernel_sp);
BLANK();
OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
*
* We also need the room, especially because orig_eax field is used
* by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
- * (13) u64 gs_base_user;
- * (12) u64 gs_base_kernel;
- * (11) u64 fs_base;
* (10) u64 gs;
* (9) u64 fs;
* (8) u64 ds;
* (2) u64 rip;
* (2/1) u32 entry_vector;
* (1/1) u32 error_code;
- * However, get_stack_bottom() actually returns 64 bytes before the real
- * bottom of the stack to allow space for:
- * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
*/
#define VMX_MONITOR_RFLAGS 0x202 /* IF on */
#define NR_SKIPPED_REGS 6 /* See the above explanation */
{
if ( (ss & 3) != 3 )
return -EPERM;
- current->arch.kernel_ss = ss;
- current->arch.kernel_sp = esp;
+ current->arch.guest_context.kernel_ss = ss;
+ current->arch.guest_context.kernel_sp = esp;
return 0;
}
switch ( which )
{
case SEGBASE_FS:
- ed->arch.user_regs.fs_base = base;
if ( wrmsr_user(MSR_FS_BASE, base, base>>32) )
ret = -EFAULT;
+ else
+ ed->arch.guest_context.fs_base = base;
break;
case SEGBASE_GS_USER:
- ed->arch.user_regs.gs_base_user = base;
if ( wrmsr_user(MSR_SHADOW_GS_BASE, base, base>>32) )
ret = -EFAULT;
+ else
+ ed->arch.guest_context.gs_base_user = base;
break;
case SEGBASE_GS_KERNEL:
- ed->arch.user_regs.gs_base_kernel = base;
if ( wrmsr_user(MSR_GS_BASE, base, base>>32) )
ret = -EFAULT;
+ else
+ ed->arch.guest_context.gs_base_kernel = base;
break;
case SEGBASE_GS_USER_SEL:
{
struct exec_domain *d = current;
- d->arch.event_address = event_address;
- d->arch.failsafe_address = failsafe_address;
- d->arch.syscall_address = syscall_address;
+ d->arch.guest_context.event_callback_eip = event_address;
+ d->arch.guest_context.failsafe_callback_eip = failsafe_address;
+ d->arch.guest_context.syscall_callback_eip = syscall_address;
return 0;
}
struct arch_exec_domain
{
- unsigned long kernel_sp;
- unsigned long kernel_ss;
+ struct vcpu_guest_context guest_context;
unsigned long flags; /* TF_ */
- /* Hardware debugging registers */
- unsigned long debugreg[8]; /* %%db0-7 debug registers */
-
- /* floating point info */
- struct i387_state i387;
-
- /* general user-visible register state */
- struct cpu_user_regs user_regs;
-
void (*schedule_tail) (struct exec_domain *);
- /*
- * Return vectors pushed to us by guest OS.
- * The stack frame for events is exactly that of an x86 hardware interrupt.
- * The stack frame for a failsafe callback is augmented with saved values
- * for segment registers %ds, %es, %fs and %gs:
- * %ds, %es, %fs, %gs, %eip, %cs, %eflags [, %oldesp, %oldss]
- */
-
- unsigned long event_selector; /* entry CS (x86/32 only) */
- unsigned long event_address; /* entry EIP */
-
- unsigned long failsafe_selector; /* entry CS (x86/32 only) */
- unsigned long failsafe_address; /* entry EIP */
-
- unsigned long syscall_address; /* entry EIP (x86/64 only) */
-
/* Bounce information for propagating an exception to guest OS. */
struct trap_bounce trap_bounce;
/* Trap info. */
#ifdef ARCH_HAS_FAST_TRAP
- int fast_trap_idx;
struct desc_struct fast_trap_desc;
#endif
- trap_info_t traps[256];
/* Virtual Machine Extensions */
struct arch_vmx_struct arch_vmx;
unsigned long guest_cr2;
/* Current LDT details. */
- unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
+ unsigned long shadow_ldt_mapcnt;
/* Next entry is passed to LGDT on domain switch. */
char gdt[10]; /* NB. 10 bytes needed for x86_64. Use 6 bytes for x86_32. */
} __cacheline_aligned;
struct desc_struct *desc;
unsigned long ents;
- if ( (ents = ed->arch.ldt_ents) == 0 )
+ if ( (ents = ed->arch.guest_context.ldt_ents) == 0 )
{
__asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );
}
#define TRAP_deferred_nmi 31
/* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
-/* NB. Same as ECF_IN_SYSCALL. No bits in common with any other TRAP_* defn. */
+/* NB. Same as VGCF_IN_SYSCALL. No bits in common with any other TRAP_ defn. */
#define TRAP_syscall 256
/*
#define IOBMP_BYTES 8192
#define IOBMP_INVALID_OFFSET 0x8000
-struct i387_state {
- u8 state[512]; /* big enough for FXSAVE */
-} __attribute__ ((aligned (16)));
-
struct tss_struct {
unsigned short back_link,__blh;
#ifdef __x86_64__
#ifdef ARCH_HAS_FAST_TRAP
#define SET_DEFAULT_FAST_TRAP(_p) \
- (_p)->fast_trap_idx = 0x20; \
+ (_p)->guest_context.fast_trap_idx = 0x20; \
(_p)->fast_trap_desc.a = 0; \
(_p)->fast_trap_desc.b = 0;
#define CLEAR_FAST_TRAP(_p) \
- (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
- 0, 8))
+ (memset(idt_tables[smp_processor_id()] + \
+ (_p)->guest_context.fast_trap_idx, \
+ 0, 8))
#define SET_FAST_TRAP(_p) \
- (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ (memcpy(idt_tables[smp_processor_id()] + \
+ (_p)->guest_context.fast_trap_idx, \
&((_p)->fast_trap_desc), 8))
long set_fast_trap(struct exec_domain *p, int idx);
/*
* Get the bottom-of-stack, as stored in the per-CPU TSS. This is actually
- * 64 bytes before the real bottom of the stack to allow space for:
- * domain pointer, DS, ES, FS, GS, FS_BASE, GS_BASE_OS, GS_BASE_APP
+ * 40 bytes before the real bottom of the stack to allow space for:
+ * domain pointer, DS, ES, FS, GS
*/
static inline unsigned long get_stack_bottom(void)
{
unsigned long p;
__asm__( "andq %%rsp,%0; addq %2,%0"
: "=r" (p)
- : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-64) );
+ : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-40) );
return p;
}
typedef u64 tsc_timestamp_t; /* RDTSC timestamp */
/*
- * The following is all CPU context. Note that the i387_ctxt block is filled
+ * The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
*/
typedef struct vcpu_guest_context {
-#define ECF_I387_VALID (1<<0)
-#define ECF_VMX_GUEST (1<<1)
-#define ECF_IN_KERNEL (1<<2)
- unsigned long flags;
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_VMX_GUEST (1<<1)
+#define VGCF_IN_KERNEL (1<<2)
+ unsigned long flags; /* VGCF_* flags */
cpu_user_regs_t user_regs; /* User-level CPU registers */
- char fpu_ctxt[256]; /* User-level FPU registers */
+ struct { char x[512]; } fpu_ctxt /* User-level FPU registers */
+ __attribute__((__aligned__(16))); /* (needs 16-byte alignment) */
trap_info_t trap_ctxt[256]; /* Virtual IDT */
unsigned int fast_trap_idx; /* "Fast trap" vector offset */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
- unsigned long kernel_ss, kernel_esp; /* Virtual TSS (only SS1/ESP1) */
+ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
unsigned long pt_base; /* CR3 (pagetable base) */
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
unsigned long event_callback_cs; /* CS:EIP of event callback */
unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
unsigned long failsafe_callback_eip;
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
-} PACKED vcpu_guest_context_t;
+} vcpu_guest_context_t;
typedef struct {
/* MFN of a table of MFNs that make up p2m table */
u64 pfn_to_mfn_frame_list;
-} PACKED arch_shared_info_t;
+} arch_shared_info_t;
typedef struct {
-} PACKED arch_vcpu_info_t;
+} arch_vcpu_info_t;
#define ARCH_HAS_FAST_TRAP
* int HYPERVISOR_switch_to_user(void)
* All arguments are on the kernel stack, in the following format.
* Never returns if successful. Current kernel context is lost.
- * If flags contains ECF_IN_SYSCALL:
+ * If flags contains VGCF_IN_SYSCALL:
* Restore RAX, RIP, RFLAGS, RSP.
* Discard R11, RCX, CS, SS.
* Otherwise:
* All other registers are saved on hypercall entry and restored to user.
*/
/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
-#define ECF_IN_SYSCALL (1<<8)
+#define VGCF_IN_SYSCALL (1<<8)
struct switch_to_user {
/* Top of stack (%rsp at point of hypercall). */
u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
u64 ss;
u64 es;
u64 ds;
- u64 fs; /* Non-zero => takes precedence over fs_base. */
- u64 gs; /* Non-zero => takes precedence over gs_base_app. */
- u64 fs_base;
- u64 gs_base_kernel;
- u64 gs_base_user;
+ u64 fs; /* Non-zero => takes precedence over fs_base. */
+ u64 gs; /* Non-zero => takes precedence over gs_base_user. */
} cpu_user_regs_t;
typedef u64 tsc_timestamp_t; /* RDTSC timestamp */
/*
- * The following is all CPU context. Note that the i387_ctxt block is filled
+ * The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
*/
typedef struct vcpu_guest_context {
-#define ECF_I387_VALID (1<<0)
-#define ECF_VMX_GUEST (1<<1)
-#define ECF_IN_KERNEL (1<<2)
- unsigned long flags;
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_VMX_GUEST (1<<1)
+#define VGCF_IN_KERNEL (1<<2)
+ unsigned long flags; /* VGCF_* flags */
cpu_user_regs_t user_regs; /* User-level CPU registers */
- char fpu_ctxt[512]; /* User-level FPU registers */
+ struct { char x[512]; } fpu_ctxt /* User-level FPU registers */
+ __attribute__((__aligned__(16))); /* (needs 16-byte alignment) */
trap_info_t trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
- unsigned long kernel_ss, kernel_esp; /* Virtual TSS (only SS1/ESP1) */
+ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
unsigned long pt_base; /* CR3 (pagetable base) */
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
unsigned long event_callback_eip;
unsigned long failsafe_callback_eip;
unsigned long syscall_callback_eip;
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
-} PACKED vcpu_guest_context_t;
+ /* Segment base addresses. */
+ u64 fs_base;
+ u64 gs_base_kernel;
+ u64 gs_base_user;
+} vcpu_guest_context_t;
typedef struct {
/* MFN of a table of MFNs that make up p2m table */
u64 pfn_to_mfn_frame_list;
-} PACKED arch_shared_info_t;
+} arch_shared_info_t;
typedef struct {
-} PACKED arch_vcpu_info_t;
+} arch_vcpu_info_t;
#endif /* !__ASSEMBLY__ */